import re
import json
import os
from collections import defaultdict
import pandas as pd
import selenium  #This is equivalent to the R command library(selenium)
from selenium import webdriver #Importing the function we need
from bs4 import BeautifulSoup
import time


#Run file in the script directory

cwd = os.getcwd()
path = cwd[:-7]

os.chdir(path)

with open('Data/Processed_Data/Lower_Court_Dockets/Supreme_Court_Dockets_Raw_Text_2017_2024.json', 'r') as file:
        dockets = json.load(file)

docket_info = defaultdict(list)
for docket in dockets:
    text = dockets[docket]
    try:
        t=text.split('Search documents in this case')[1].split('Proceedings and Orders')[0]
    except:
        continue
    docket_info['docket'].append(docket)
    try:
        docket_info['case_name'].append(re.findall('Title:(.+?)Docketed',t, re.DOTALL)[0].strip('\n'))
    except:
        docket_info['case_name'].append('NA')

    try:
        docket_info['lower_ct'].append(re.findall('Lower Ct:(.+?)\xa0',t, re.DOTALL)[0].strip('\n'))
    except:
        docket_info['lower_ct'].append('NA')
    try:
        docket_info['lower_docket'].append(re.findall('Case Numbers:(.+?)\xa0',t, re.DOTALL)[0].strip('\n'))
    except:
        docket_info['lower_docket'].append('NA')
    try:
        SG_p='False'
        SG_r='False'
        t = text
        attorneys = t.split('\n\n\nAttorneys\n\n')[1].split('LOC[')[0].lower().split('other attorneys')[0]
        try:
            docket_info['petitioner_counsel'].append(attorneys.split('attorneys for petitioner')[1].split('attorneys for respondent')[0])
            if re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower())!=[]:
                if re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower().split('respondent')[0])!=[]:
                    SG_p='True'
                elif re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower().split('other attorneys')[0])!=[]:
                    SG_r='True'
            docket_info['sg_petitioner'].append(SG_p)
        except:
            docket_info['petitioner_counsel'].append(attorneys.split('attorneys for appellant')[1].split('attorneys for appellee')[0])
            if re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower())!=[]:
                if re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower().split('appellee')[0])!=[]:
                    SG_p='True'
                elif re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower().split('other attorneys')[0])!=[]:
                    SG_r='True'
            docket_info['sg_petitioner'].append(SG_p)
    except:
        docket_info['sg_petitioner'].append('NA')
        docket_info['petitioner_counsel'].append('NA')
    try:
        SG_p='False'
        SG_r='False'
        t = text
        try:
            if re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower())!=[]:
                if re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower().split('respondent')[0])!=[]:
                    SG_p='True'
                elif re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower().split('other attorneys')[0])!=[]:
                    SG_r='True'
            attorneys = t.split('\n\n\nAttorneys\n\n')[1].split('LOC[')[0].lower().split('other attorneys')[0]
            respondent = attorneys.split('respondents\n')[1]
            docket_info['respondent_counsel'].append(attorneys.split('attorneys for respondent')[1].split('{1}')[0])
            docket_info['sg_respondent'].append(SG_r)
        except:
            if re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower())!=[]:
                if re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower().split('appellee')[0])!=[]:
                    SG_p='True'
                elif re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower().split('other attorneys')[0])!=[]:
                    SG_r='True'
            attorneys = t.split('\n\n\nAttorneys\n\n')[1].split('LOC[')[0].lower().split('other attorneys')[0]
            respondent = attorneys.split('respondents\n')[1]
            docket_info['respondent_counsel'].append(attorneys.split('attorneys for appellee')[1].split('{1}')[0])
            docket_info['sg_respondent'].append(SG_r)
            
    except:
        docket_info['sg_respondent'].append('NA')
        docket_info['respondent_counsel'].append('NA')
    try:
        t=text
        attorneys = t.split('\n\n\nAttorneys\n\n')[1].split('LOC[')[0].lower().split('other attorneys')[1]
        docket_info['other_parties'].append(attorneys.split('{1}')[0])
    except:
        docket_info['other_parties'].append('NA')

with open('Data/Processed_Data/Lower_Court_Dockets/Supreme_Court_Dockets_Raw_Text_2001_2016.json', 'r') as file:
        dockets = json.load(file)

for docket in dockets:
    text = dockets[docket]
    try:
        t=text.replace('\n','').split('Search Results')[1].split('~Proceedings')[0]
    except:
        continue
    docket_info['docket'].append(docket)
    try:
        docket_info['case_name'].append(re.findall('Title:(.+?)Docketed',t, re.DOTALL)[0].strip('\n'))
    except:
        docket_info['case_name'].append('NA')

    try:
        docket_info['lower_ct'].append(re.findall('Lower Ct:(.+?)\xa0',t, re.DOTALL)[0].strip('\n'))
    except:
        docket_info['lower_ct'].append('NA')
    try:
        docket_info['lower_docket'].append(re.findall('Case Nos.:(.+?)\xa0',t)[0])
    except:
        try:
            docket_info['lower_docket'].append(re.findall(r'\d{4}\s+\((.+?)\).+?~',t)[0])
        except:
            docket_info['lower_docket'].append('NA')
    try:
        try:
            SG_p='False'
            SG_r='False'
            t = text
            attorneys = t.split('Phone~')[1].split('LOC[')[0].lower().split('other attorneys')[0]
            docket_info['petitioner_counsel'].append(attorneys.split('for petitioner')[1].split('attorneys for respondent')[0])
            if re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower())!=[]:
                if re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower().split('respondent')[0])!=[]:
                    SG_p='True'
                elif re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower().split('other attorneys')[0])!=[]:
                    SG_r='True'
            docket_info['sg_petitioner'].append(SG_p)
        except:
            SG_p='False'
            SG_r='False'
            t = text
            attorneys = t.split('Phone~')[1].split('LOC[')[0].lower().split('other attorneys')[0]
            docket_info['petitioner_counsel'].append(attorneys.split('for appellant')[1].split('attorneys for appellee')[0])
            if re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower())!=[]:
                if re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower().split('appellee')[0])!=[]:
                    SG_p='True'
                elif re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower().split('other attorneys')[0])!=[]:
                    SG_r='True'
            docket_info['sg_petitioner'].append(SG_p)
    except:
        docket_info['sg_petitioner'].append('NA')
        docket_info['petitioner_counsel'].append('NA')
    try:
        try:
            SG_p='False'
            SG_r='False'
            t = text
            if re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower())!=[]:
                if re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower().split('respondent')[0])!=[]:
                    SG_p='True'
                elif re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower().split('other attorneys')[0])!=[]:
                    SG_r='True'
            attorneys = t.split('Phone~')[1].split('LOC[')[0].lower().split('other attorneys')[0]
            docket_info['respondent_counsel'].append(attorneys.split('for respondent')[1].split('{1}')[0])
            docket_info['sg_respondent'].append(SG_r)
        except:
            SG_p='False'
            SG_r='False'
            t = text
            if re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower())!=[]:
                if re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower().split('appellee')[0])!=[]:
                    SG_p='True'
                elif re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower().split('other attorneys')[0])!=[]:
                    SG_r='True'
            attorneys = t.split('Phone~')[1].split('LOC[')[0].lower().split('other attorneys')[0]
            docket_info['respondent_counsel'].append(attorneys.split('for appellee')[1].split('{1}')[0])
            docket_info['sg_respondent'].append(SG_r)
    except:
        docket_info['sg_respondent'].append('NA')
        docket_info['respondent_counsel'].append('NA')
    try:
        t=text
        attorneys = t.split('Phone~')[1].split('LOC[')[0].lower().split('other attorneys')[1]
        docket_info['other_parties'].append(attorneys.split('{1}')[0])
    except:
        docket_info['other_parties'].append('NA')


with open('Data/Processed_Data/Lower_Court_Dockets/Supreme_Court_Dockets_Raw_Text_1996_2001.json', 'r') as file:
        dockets = json.load(file)


for docket in dockets:
    text = dockets[docket]
    try:
        t=text.replace('\n','').split('~Proceedings')[0]
    except:
        continue
    docket_info['docket'].append(docket)
    try:
        docket_info['case_name'].append(re.findall('Title:(.+?)Docketed',t, re.DOTALL)[0].strip('\n'))
    except:
        docket_info['case_name'].append('NA')

    try:
        docket_info['lower_ct'].append(re.findall('Lower Ct:(.+?)Case Nos',t, re.DOTALL)[0].strip('\n'))
    except:
        docket_info['lower_ct'].append('NA')
    try:
        docket_info['lower_docket'].append(re.findall(r'Case Nos.:\((.+?)\)',t)[0])
    except:
        docket_info['lower_docket'].append('NA')
    try:
        SG_p='False'
        SG_r='False'
        t = text
        attorneys = t.split('Phone~')[1].split('LOC[')[0].lower().split('other attorneys')[0]
        docket_info['petitioner_counsel'].append(attorneys.split('for petitioner')[1].split('attorneys for respondent')[0])
        if re.findall(r'u\.s\.\s*department\s*of\s*justice',attorneys.lower())!=[]:
            if re.findall(r'u\.s\.\s*department\s*of\s*justice',attorneys.lower().split('respondent')[0])!=[]:
                SG_p='True'
            elif re.findall(r'u\.s\.\s*department\s*of\s*justice',attorneys.lower().split('other attorneys')[0])!=[]:
                SG_r='True'
        docket_info['sg_petitioner'].append(SG_p)
    except:
        docket_info['sg_petitioner'].append('NA')
        docket_info['petitioner_counsel'].append('NA')
    try:
        SG_p='False'
        SG_r='False'
        t = text
        if re.findall(r'u\.s\.\s*department\s*of\s*justice',attorneys.lower())!=[]:
            if re.findall(r'u\.s\.\s*department\s*of\s*justice',attorneys.lower().split('respondent')[0])!=[]:
                SG_p='True'
            elif re.findall(r'u\.s\.\s*department\s*of\s*justice',attorneys.lower().split('other attorneys')[0])!=[]:
                SG_r='True'
        attorneys = t.split('Phone~')[1].split('LOC[')[0].lower().split('other attorneys')[0]
        docket_info['respondent_counsel'].append(attorneys.split('for respondent:')[1])
        docket_info['sg_respondent'].append(SG_r)
    except:
        docket_info['sg_respondent'].append('NA')
        docket_info['respondent_counsel'].append('NA')
    try:
        t=text
        attorneys = t.split('Phone~')[1].split('LOC[')[0].lower().split('other attorneys')[1]
        docket_info['other_parties'].append(attorneys.split('{1}')[0])
    except:
        docket_info['other_parties'].append('NA')


temp = set()
for i in range(len(docket_info['docket'])):
    if str(docket_info['sg_petitioner'][i])!='NA':
        temp.add(docket_info['docket'][i].replace('-','–'))

data = pd.read_excel('Data/Processed_Data/Docket_Day_Action.xlsx')
data = data[(data['term']<=25)]
data = data[(data['term']>0)]

missing_dockets = list(set(data['cleaned_docket'])-temp)

link_start = 'https://www.supremecourt.gov/search.aspx?filename=/docket/docketfiles/html/public/'
driver = webdriver.Chrome() #Load our browser
dockets = {}
for docket in missing_dockets:
    docket = str(docket)
    if 'D' not in docket:
        time.sleep(1)
        docket = str(docket)
        if docket not in dockets:
            if '–' in docket:
                link = link_start + docket[:2] + '-' + docket[3:]+'.html' 
            else:
                link = link_start+docket+'.html'
            driver.get(link) 
            page_html = driver.page_source
            soup = BeautifulSoup(page_html, 'html.parser')
            t = soup.get_text()
            dockets[docket]=t
driver.quit()


with open('Data/Processed_Data/Lower_Court_Dockets/Supreme_Court_Dockets_Raw_Text_Missing.json', 'w') as f:
        json.dump(dockets, f, indent=4)
with open('Data/Processed_Data/Lower_Court_Dockets/Supreme_Court_Dockets_Raw_Text_Missing.json', 'r') as file:
        dockets=json.load(file)


for docket in dockets:
    text = dockets[docket]
    try:
        t=text.split('Search documents in this case')[1].split('Proceedings and Orders')[0]
    except:
        continue
    docket_info['docket'].append(docket)
    try:
        docket_info['case_name'].append(re.findall('Title:(.+?)Docketed',t, re.DOTALL)[0].strip('\n'))
    except:
        docket_info['case_name'].append('NA')

    try:
        docket_info['lower_ct'].append(re.findall('Lower Ct:(.+?)\xa0',t, re.DOTALL)[0].strip('\n'))
    except:
        docket_info['lower_ct'].append('NA')
    try:
        docket_info['lower_docket'].append(re.findall('Case Numbers:(.+?)\xa0',t, re.DOTALL)[0].strip('\n'))
    except:
        docket_info['lower_docket'].append('NA')
    try:
        SG_p='False'
        SG_r='False'
        t = text
        attorneys = t.split('\n\n\nAttorneys\n\n')[1].split('LOC[')[0].lower().split('other attorneys')[0]
        try:
            docket_info['petitioner_counsel'].append(attorneys.split('attorneys for petitioner')[1].split('attorneys for respondent')[0])
            if re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower())!=[]:
                if re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower().split('respondent')[0])!=[]:
                    SG_p='True'
                elif re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower().split('other attorneys')[0])!=[]:
                    SG_r='True'
            docket_info['sg_petitioner'].append(SG_p)
        except:
            docket_info['petitioner_counsel'].append(attorneys.split('attorneys for appellant')[1].split('attorneys for appellee')[0])
            if re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower())!=[]:
                if re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower().split('appellee')[0])!=[]:
                    SG_p='True'
                elif re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower().split('other attorneys')[0])!=[]:
                    SG_r='True'
            docket_info['sg_petitioner'].append(SG_p)
    except:
        docket_info['sg_petitioner'].append('NA')
        docket_info['petitioner_counsel'].append('NA')
    try:
        SG_p='False'
        SG_r='False'
        t = text
        try:
            if re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower())!=[]:
                if re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower().split('respondent')[0])!=[]:
                    SG_p='True'
                elif re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower().split('other attorneys')[0])!=[]:
                    SG_r='True'
            attorneys = t.split('\n\n\nAttorneys\n\n')[1].split('LOC[')[0].lower().split('other attorneys')[0]
            respondent = attorneys.split('respondents\n')[1]
            docket_info['respondent_counsel'].append(attorneys.split('attorneys for respondent')[1].split('{1}')[0])
            docket_info['sg_respondent'].append(SG_r)
        except:
            if re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower())!=[]:
                if re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower().split('appellee')[0])!=[]:
                    SG_p='True'
                elif re.findall(r'united\s*states\s*department\s*of\s*justice',attorneys.lower().split('other attorneys')[0])!=[]:
                    SG_r='True'
            attorneys = t.split('\n\n\nAttorneys\n\n')[1].split('LOC[')[0].lower().split('other attorneys')[0]
            respondent = attorneys.split('respondents\n')[1]
            docket_info['respondent_counsel'].append(attorneys.split('attorneys for appellee')[1].split('{1}')[0])
            docket_info['sg_respondent'].append(SG_r)
            
    except:
        docket_info['sg_respondent'].append('NA')
        docket_info['respondent_counsel'].append('NA')
    try:
        t=text
        attorneys = t.split('\n\n\nAttorneys\n\n')[1].split('LOC[')[0].lower().split('other attorneys')[1]
        docket_info['other_parties'].append(attorneys.split('{1}')[0])
    except:
        docket_info['other_parties'].append('NA')

temp = defaultdict(list)
for heading in docket_info:
    for entry in docket_info[heading]:
        entry = entry.replace('\n','  ')
        entry = re.sub(r'\s+',' ',entry)
        entry = entry.strip()
        entry = entry.replace('–','-')
        entry = entry.replace('-','-')
        temp[heading].append(entry)
docket_info=temp

data = pd.DataFrame(docket_info)


data.to_excel('Data/Processed_Data/Docket_Information_1997_2024.xlsx', index=False)

        